{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "7a63bafe-7bcd-45c3-bf4e-8ef034fc830d",
   "metadata": {},
   "outputs": [],
   "source": [
    "from selenium import webdriver\n",
    "from selenium.webdriver.common.keys import Keys\n",
    "from selenium.webdriver.common.by import By"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "2fdda1d6-abd1-428c-9048-20b136216060",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "选电视剧\n"
     ]
    }
   ],
   "source": [
    "driver = webdriver.Firefox()\n",
    "driver.get(\"https://movie.douban.com/tv/\")\n",
    "print(driver.title)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "bc64e037-33a1-430b-bd4b-23ad008ec60e",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 类型\n",
    "elem_lx = driver.find_element(By.XPATH,\"/html/body/div[3]/div[1]/div/div[1]/div/div/div[1]/div/div[1]/div[2]\")\n",
    "elem_lx.click()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "089f805a-1cb8-48f7-a3f7-afe734abb113",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 华语\n",
    "elem_hy = driver.find_element(By.XPATH,\"/html/body/div[3]/div[1]/div/div[1]/div/div/div[1]/div/div[1]/div[2]/div/div[2]/div/div/ul/li[2]/span\")\n",
    "elem_hy.click()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "04ae9f1f-8583-4cf8-9b97-3ccd90e6f49d",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "20\n"
     ]
    }
   ],
   "source": [
    "# 等五秒,然后取出数据列表\n",
    "driver.implicitly_wait(5)\n",
    "elem_list_container = driver.find_element(By.CLASS_NAME,\"explore-list\")\n",
    "elem_list = elem_list_container.find_elements(By.TAG_NAME,\"li\")\n",
    "print(len(elem_list))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "f58aace0-9fa2-4dff-a930-8275c4ac87dd",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[{'name': '繁花', 'subtitle': '2023 / 中国大陆 / 剧情 爱情 / 王家卫 / 胡歌 马伊琍', 'src': 'https://img3.doubanio.com/view/photo/m_ratio_poster/public/p2902705337.jpg', 'href': 'https://www.douban.com/doubanapp/dispatch?uri=/tv/34874646', 'score': '8.4'}, {'name': '三大队', 'subtitle': '2023 / 中国大陆 / 动作 悬疑 犯罪 / 邢键钧 / 秦昊 李乃文', 'src': 'https://img1.doubanio.com/view/photo/m_ratio_poster/public/p2902230708.jpg', 'href': 'https://www.douban.com/doubanapp/dispatch?uri=/tv/36178641', 'score': '7.1'}, {'name': '脱轨', 'subtitle': '2023 / 中国大陆 / 剧情 爱情 奇幻 / 沈阳 / 刘浩存 林一', 'src': 'https://img9.doubanio.com/view/photo/m_ratio_poster/public/p2901745424.jpg', 'href': 'https://www.douban.com/doubanapp/dispatch?uri=/tv/36016469', 'score': '6.6'}, {'name': '鸣龙少年', 'subtitle': '2023 / 中国大陆 / 剧情 / 丁黑 / 张若昀 黄尧', 'src': 'https://img1.doubanio.com/view/photo/m_ratio_poster/public/p2901660879.jpg', 'href': 'https://www.douban.com/doubanapp/dispatch?uri=/tv/35873858', 'score': '7.0'}, {'name': '莲花楼', 'subtitle': '2023 / 中国大陆 / 剧情 悬疑 武侠 古装 / 郭虎 任海涛 / 成毅 曾舜晞', 'src': 'https://img9.doubanio.com/view/photo/m_ratio_poster/public/p2895958304.jpg', 'href': 'https://www.douban.com/doubanapp/dispatch?uri=/tv/35633163', 'score': '8.5'}, {'name': '漫长的季节', 'subtitle': '2023 / 中国大陆 / 剧情 家庭 犯罪 / 辛爽 / 范伟 秦昊', 'src': 'https://img9.doubanio.com/view/photo/m_ratio_poster/public/p2890906384.jpg', 'href': 'https://www.douban.com/doubanapp/dispatch?uri=/tv/35588177', 'score': '9.4'}, {'name': '新闻女王', 'subtitle': '2023 / 中国香港 中国大陆 / 剧情 / 陈海斌 姜振杰 / 佘诗曼 马国明', 'src': 'https://img1.doubanio.com/view/photo/m_ratio_poster/public/p2900220099.jpg', 'href': 'https://www.douban.com/doubanapp/dispatch?uri=/tv/36140095', 'score': '7.8'}, {'name': '神隐', 'subtitle': '2023 / 中国大陆 / 剧情 爱情 奇幻 古装 / 陈家霖 李才 / 赵露思 王安宇', 'src': 'https://img1.doubanio.com/view/photo/m_ratio_poster/public/p2901821390.jpg', 'href': 'https://www.douban.com/doubanapp/dispatch?uri=/tv/35861086', 'score': '6.1'}, {'name': '一念关山', 'subtitle': '2023 / 中国大陆 / 动作 古装 / 周靖韬 邹曦 / 刘诗诗 刘宇宁', 'src': 'https://img3.doubanio.com/view/photo/m_ratio_poster/public/p2901264492.jpg', 'href': 'https://www.douban.com/doubanapp/dispatch?uri=/tv/35797771', 'score': '6.5'}, {'name': '天启异闻录', 'subtitle': '2023 / 中国大陆 / 剧情 冒险 古装 / 路阳 乔磊 范川 / 黄轩 吴樾', 'src': 'https://img3.doubanio.com/view/photo/m_ratio_poster/public/p2902566883.jpg', 'href': 'https://www.douban.com/doubanapp/dispatch?uri=/tv/35744499', 'score': '5.1'}, {'name': '繁城之下', 'subtitle': '2023 / 中国大陆 / 悬疑 犯罪 古装 / 王铮 / 白宇帆 宁理', 'src': 'https://img2.doubanio.com/view/photo/m_ratio_poster/public/p2899317221.jpg', 'href': 'https://www.douban.com/doubanapp/dispatch?uri=/tv/35725842', 'score': '8.5'}, {'name': '狂飙', 'subtitle': '2023 / 中国大陆 / 剧情 犯罪 / 徐纪周 / 张译 张颂文', 'src': 'https://img2.doubanio.com/view/photo/m_ratio_poster/public/p2886376181.jpg', 'href': 'https://www.douban.com/doubanapp/dispatch?uri=/tv/35465232', 'score': '8.5'}, {'name': '我知道我爱你', 'subtitle': '2023 / 中国大陆 / 剧情 / 吕聿来 / 张晚意 孙怡', 'src': 'https://img9.doubanio.com/view/photo/m_ratio_poster/public/p2902408635.jpg', 'href': 'https://www.douban.com/doubanapp/dispatch?uri=/tv/35693264', 'score': '6.2'}, {'name': '一闪一闪亮星星', 'subtitle': '2022 / 中国大陆 / 爱情 悬疑 奇幻 / 陈小明 章攀 / 张佳宁 屈楚萧', 'src': 'https://img3.doubanio.com/view/photo/m_ratio_poster/public/p2867203983.jpg', 'href': 'https://www.douban.com/doubanapp/dispatch?uri=/tv/35087769', 'score': '6.8'}, {'name': '故乡，别来无恙', 'subtitle': '2023 / 中国大陆 / 剧情 / 彭晨 王光 / 任素汐 李雪琴', 'src': 'https://img9.doubanio.com/view/photo/m_ratio_poster/public/p2893477705.jpg', 'href': 'https://www.douban.com/doubanapp/dispatch?uri=/tv/36059766', 'score': '8.2'}, {'name': '三体', 'subtitle': '2023 / 中国大陆 / 剧情 科幻 / 杨磊 / 张鲁一 于和伟', 'src': 'https://img2.doubanio.com/view/photo/m_ratio_poster/public/p2886492021.jpg', 'href': 'https://www.douban.com/doubanapp/dispatch?uri=/tv/26647087', 'score': '8.7'}, {'name': '后宫·甄嬛传', 'subtitle': '2011 / 中国大陆 / 剧情 古装 / 郑晓龙 / 孙俪 陈建斌', 'src': 'https://img3.doubanio.com/view/photo/m_ratio_poster/public/p1480046723.jpg', 'href': 'https://www.douban.com/doubanapp/dispatch?uri=/tv/4922787', 'score': '9.4'}, {'name': '古相思曲', 'subtitle': '2023 / 中国大陆 / 剧情 爱情 奇幻 古装 / 知竹 / 张雅钦 郭迦南', 'src': 'https://img3.doubanio.com/view/photo/m_ratio_poster/public/p2893594852.jpg', 'href': 'https://www.douban.com/doubanapp/dispatch?uri=/tv/36155049', 'score': '8.6'}, {'name': '宁安如梦', 'subtitle': '2023 / 中国大陆 / 剧情 古装 / 朱锐斌 / 白鹿 张凌赫', 'src': 'https://img2.doubanio.com/view/photo/m_ratio_poster/public/p2892072141.jpg', 'href': 'https://www.douban.com/doubanapp/dispatch?uri=/tv/35465016', 'score': '6.8'}, {'name': '花儿与少年·丝路季', 'subtitle': '2023 / 中国大陆 / 真人秀 / 李超 / 秦海璐 秦岚', 'src': 'https://img1.doubanio.com/view/photo/m_ratio_poster/public/p2900797398.jpg', 'href': 'https://www.douban.com/doubanapp/dispatch?uri=/tv/36247293', 'score': '8.4'}]\n"
     ]
    }
   ],
   "source": [
    "# 循环获取数据\n",
    "data_arr = []\n",
    "for elem_data in elem_list:\n",
    "    elem_a = elem_data.find_element(By.XPATH,\"a\")\n",
    "    elem_img = elem_data.find_element(By.XPATH,\"a/div/div[1]/div/img\")\n",
    "    src = elem_img.get_attribute(\"src\")\n",
    "    elem_name = elem_data.find_element(By.XPATH,\"a/div/div[2]/div/div[1]/span\")\n",
    "    elem_subtitle = elem_data.find_element(By.XPATH,\"a/div/div[2]/div/div[1]/div\")\n",
    "    elem_score = elem_data.find_element(By.XPATH,\"a/div/div[2]/div/div[2]/span[2]\")\n",
    "    obj = {\"name\":elem_name.text,\"subtitle\":elem_subtitle.text,\"src\":src,\"href\":elem_a.get_attribute(\"href\"),\"score\":elem_score.text}\n",
    "    data_arr.append(obj)\n",
    "print(data_arr)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "d8d4bccc-b849-4d03-8a75-ad7977e099c7",
   "metadata": {},
   "outputs": [],
   "source": [
    "driver.close()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "8b6d1f35-2ac8-40e2-8177-4778a9ef4f7e",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "data = pd.DataFrame(data_arr)\n",
    "data.to_excel(\"douban_tv_data.xlsx\")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
